library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

import the data

citibike_sample_12k = 
  read.csv("/Users/hongdachen/Desktop/citibike_sample_12k.csv")

clean the data

citibike_boxplot <- citibike_sample_12k %>%
  mutate(
    started_at = ymd_hms(started_at),
    ended_at = ymd_hms(ended_at),
    duration_min = as.numeric(difftime(ended_at, started_at, units = "mins")),
    month = month(started_at, label = TRUE, abbr = TRUE)) %>%
  filter(duration_min > 0, duration_min < 200)

boxplot

plot_ly(
  data = citibike_boxplot,
  x = ~month,
  y = ~duration_min,
  type = "box",
  color = ~month,
  colors = "viridis",
  boxpoints = "outliers",  
  marker = list(
    size = 6,       
    opacity = 1  
  ),
  line = list(width = 1)    
) %>%
  layout(
    title = "Monthly Ride Duration Distribution (Citibike)",
    xaxis = list(title = "Month"),
    yaxis = list(title = "Ride Duration (minutes)")
  )